summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFernando Sahmkow <fsahmkow27@gmail.com>2023-06-28 06:28:13 +0200
committerFernando Sahmkow <fsahmkow27@gmail.com>2023-06-28 19:34:21 +0200
commit47d0d292d5cc5f0404e126023279db7decd532ac (patch)
treea43e59102a9db2a124c7473842fbf0d661ff628e
parentMerge pull request #10837 from liamwhite/mali-support (diff)
downloadyuzu-47d0d292d5cc5f0404e126023279db7decd532ac.tar
yuzu-47d0d292d5cc5f0404e126023279db7decd532ac.tar.gz
yuzu-47d0d292d5cc5f0404e126023279db7decd532ac.tar.bz2
yuzu-47d0d292d5cc5f0404e126023279db7decd532ac.tar.lz
yuzu-47d0d292d5cc5f0404e126023279db7decd532ac.tar.xz
yuzu-47d0d292d5cc5f0404e126023279db7decd532ac.tar.zst
yuzu-47d0d292d5cc5f0404e126023279db7decd532ac.zip
-rw-r--r--src/core/core.cpp26
-rw-r--r--src/core/core.h11
-rw-r--r--src/core/gpu_dirty_memory_manager.h112
-rw-r--r--src/core/memory.cpp7
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h26
-rw-r--r--src/video_core/buffer_cache/buffer_cache_base.h2
-rw-r--r--src/video_core/gpu.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp9
8 files changed, 183 insertions, 14 deletions
diff --git a/src/core/core.cpp b/src/core/core.cpp
index b74fd0a58..deefeb301 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -27,6 +27,7 @@
#include "core/file_sys/savedata_factory.h"
#include "core/file_sys/vfs_concat.h"
#include "core/file_sys/vfs_real.h"
+#include "core/gpu_dirty_memory_manager.h"
#include "core/hid/hid_core.h"
#include "core/hle/kernel/k_memory_manager.h"
#include "core/hle/kernel/k_process.h"
@@ -54,6 +55,7 @@
#include "video_core/renderer_base.h"
#include "video_core/video_core.h"
+
MICROPROFILE_DEFINE(ARM_CPU0, "ARM", "CPU 0", MP_RGB(255, 64, 64));
MICROPROFILE_DEFINE(ARM_CPU1, "ARM", "CPU 1", MP_RGB(255, 64, 64));
MICROPROFILE_DEFINE(ARM_CPU2, "ARM", "CPU 2", MP_RGB(255, 64, 64));
@@ -540,6 +542,9 @@ struct System::Impl {
std::array<u64, Core::Hardware::NUM_CPU_CORES> dynarmic_ticks{};
std::array<MicroProfileToken, Core::Hardware::NUM_CPU_CORES> microprofile_cpu{};
+
+ std::array<Core::GPUDirtyMemoryManager, Core::Hardware::NUM_CPU_CORES>
+ gpu_dirty_memory_write_manager{};
};
System::System() : impl{std::make_unique<Impl>(*this)} {}
@@ -629,10 +634,31 @@ void System::PrepareReschedule(const u32 core_index) {
impl->kernel.PrepareReschedule(core_index);
}
+Core::GPUDirtyMemoryManager& System::CurrentGPUDirtyMemoryManager() {
+ const std::size_t core = impl->kernel.GetCurrentHostThreadID();
+ return impl->gpu_dirty_memory_write_manager[core < Core::Hardware::NUM_CPU_CORES
+ ? core
+ : Core::Hardware::NUM_CPU_CORES - 1];
+}
+
+/// Provides a constant reference to the current gou dirty memory manager.
+const Core::GPUDirtyMemoryManager& System::CurrentGPUDirtyMemoryManager() const {
+ const std::size_t core = impl->kernel.GetCurrentHostThreadID();
+ return impl->gpu_dirty_memory_write_manager[core < Core::Hardware::NUM_CPU_CORES
+ ? core
+ : Core::Hardware::NUM_CPU_CORES - 1];
+}
+
size_t System::GetCurrentHostThreadID() const {
return impl->kernel.GetCurrentHostThreadID();
}
+void System::GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback) {
+ for (auto& manager : impl->gpu_dirty_memory_write_manager) {
+ manager.Gather(callback);
+ }
+}
+
PerfStatsResults System::GetAndResetPerfStats() {
return impl->GetAndResetPerfStats();
}
diff --git a/src/core/core.h b/src/core/core.h
index 93afc9303..14b2f7785 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -108,9 +108,10 @@ class CpuManager;
class Debugger;
class DeviceMemory;
class ExclusiveMonitor;
-class SpeedLimiter;
+class GPUDirtyMemoryManager;
class PerfStats;
class Reporter;
+class SpeedLimiter;
class TelemetrySession;
struct PerfStatsResults;
@@ -225,6 +226,14 @@ public:
/// Prepare the core emulation for a reschedule
void PrepareReschedule(u32 core_index);
+ /// Provides a reference to the gou dirty memory manager.
+ [[nodiscard]] Core::GPUDirtyMemoryManager& CurrentGPUDirtyMemoryManager();
+
+ /// Provides a constant reference to the current gou dirty memory manager.
+ [[nodiscard]] const Core::GPUDirtyMemoryManager& CurrentGPUDirtyMemoryManager() const;
+
+ void GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback);
+
[[nodiscard]] size_t GetCurrentHostThreadID() const;
/// Gets and resets core performance statistics
diff --git a/src/core/gpu_dirty_memory_manager.h b/src/core/gpu_dirty_memory_manager.h
new file mode 100644
index 000000000..9c3d41d11
--- /dev/null
+++ b/src/core/gpu_dirty_memory_manager.h
@@ -0,0 +1,112 @@
+#pragma once
+
+#include <atomic>
+#include <bit>
+#include <functional>
+#include <mutex>
+#include <utility>
+#include <vector>
+
+#include "core/memory.h"
+
+namespace Core {
+
+class GPUDirtyMemoryManager {
+public:
+ GPUDirtyMemoryManager() : current{default_transform} {}
+
+ ~GPUDirtyMemoryManager() = default;
+
+ void Collect(VAddr address, size_t size) {
+ TransformAddress t = BuildTransform(address, size);
+ TransformAddress tmp, original;
+ do {
+ tmp = current.load(std::memory_order_acquire);
+ original = tmp;
+ if (tmp.address != t.address) {
+ if (IsValid(tmp.address)) {
+ std::scoped_lock lk(guard);
+ back_buffer.emplace_back(tmp);
+ current.exchange(t, std::memory_order_relaxed);
+ return;
+ }
+ tmp.address = t.address;
+ tmp.mask = 0;
+ }
+ if ((tmp.mask | t.mask) == tmp.mask) {
+ return;
+ }
+ tmp.mask |= t.mask;
+ } while (!current.compare_exchange_weak(original, tmp, std::memory_order_release,
+ std::memory_order_relaxed));
+ }
+
+ void Gather(std::function<void(VAddr, size_t)>& callback) {
+ {
+ std::scoped_lock lk(guard);
+ TransformAddress t = current.exchange(default_transform, std::memory_order_relaxed);
+ front_buffer.swap(back_buffer);
+ if (IsValid(t.address)) {
+ front_buffer.emplace_back(t);
+ }
+ }
+ for (auto& transform : front_buffer) {
+ size_t offset = 0;
+ u64 mask = transform.mask;
+ while (mask != 0) {
+ const size_t empty_bits = std::countr_zero(mask);
+ offset += empty_bits << align_bits;
+ mask = mask >> empty_bits;
+
+ const size_t continuous_bits = std::countr_one(mask);
+ callback((transform.address << Memory::YUZU_PAGEBITS) + offset,
+ continuous_bits << align_bits);
+ mask = continuous_bits < align_size ? (mask >> continuous_bits) : 0;
+ offset += continuous_bits << align_bits;
+ }
+ }
+ front_buffer.clear();
+ }
+
+private:
+ struct alignas(16) TransformAddress {
+ VAddr address;
+ u64 mask;
+ };
+
+ constexpr static size_t align_bits = 6U;
+ constexpr static size_t align_size = 1U << align_bits;
+ constexpr static size_t align_mask = align_size - 1;
+ constexpr static TransformAddress default_transform = {.address = ~0ULL, .mask = 0ULL};
+
+ bool IsValid(VAddr address) {
+ return address < (1ULL << 39);
+ }
+
+ template <typename T>
+ T CreateMask(size_t top_bit, size_t minor_bit) {
+ T mask = ~T(0);
+ mask <<= (sizeof(T) * 8 - top_bit);
+ mask >>= (sizeof(T) * 8 - top_bit);
+ mask >>= minor_bit;
+ mask <<= minor_bit;
+ return mask;
+ }
+
+ TransformAddress BuildTransform(VAddr address, size_t size) {
+ const size_t minor_address = address & Memory::YUZU_PAGEMASK;
+ const size_t minor_bit = minor_address >> align_bits;
+ const size_t top_bit = (minor_address + size + align_mask) >> align_bits;
+ TransformAddress result{};
+ result.address = address >> Memory::YUZU_PAGEBITS;
+ result.mask = CreateMask<u64>(top_bit, minor_bit);
+ return result;
+ }
+
+ std::atomic<TransformAddress> current{};
+ std::mutex guard;
+ std::vector<TransformAddress> back_buffer;
+ std::vector<TransformAddress> front_buffer;
+};
+
+} // namespace Core
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 514ba0d66..60b246bdd 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -13,6 +13,7 @@
#include "common/swap.h"
#include "core/core.h"
#include "core/device_memory.h"
+#include "core/gpu_dirty_memory_manager.h"
#include "core/hardware_properties.h"
#include "core/hle/kernel/k_page_table.h"
#include "core/hle/kernel/k_process.h"
@@ -678,7 +679,7 @@ struct Memory::Impl {
LOG_ERROR(HW_Memory, "Unmapped Write{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8,
GetInteger(vaddr), static_cast<u64>(data));
},
- [&]() { system.GPU().InvalidateRegion(GetInteger(vaddr), sizeof(T)); });
+ [&]() { system.CurrentGPUDirtyMemoryManager().Collect(GetInteger(vaddr), sizeof(T)); });
if (ptr) {
std::memcpy(ptr, &data, sizeof(T));
}
@@ -692,7 +693,7 @@ struct Memory::Impl {
LOG_ERROR(HW_Memory, "Unmapped WriteExclusive{} @ 0x{:016X} = 0x{:016X}",
sizeof(T) * 8, GetInteger(vaddr), static_cast<u64>(data));
},
- [&]() { system.GPU().InvalidateRegion(GetInteger(vaddr), sizeof(T)); });
+ [&]() { system.CurrentGPUDirtyMemoryManager().Collect(GetInteger(vaddr), sizeof(T)); });
if (ptr) {
const auto volatile_pointer = reinterpret_cast<volatile T*>(ptr);
return Common::AtomicCompareAndSwap(volatile_pointer, data, expected);
@@ -707,7 +708,7 @@ struct Memory::Impl {
LOG_ERROR(HW_Memory, "Unmapped WriteExclusive128 @ 0x{:016X} = 0x{:016X}{:016X}",
GetInteger(vaddr), static_cast<u64>(data[1]), static_cast<u64>(data[0]));
},
- [&]() { system.GPU().InvalidateRegion(GetInteger(vaddr), sizeof(u128)); });
+ [&]() { system.CurrentGPUDirtyMemoryManager().Collect(GetInteger(vaddr), sizeof(u128)); });
if (ptr) {
const auto volatile_pointer = reinterpret_cast<volatile u64*>(ptr);
return Common::AtomicCompareAndSwap(volatile_pointer, data, expected);
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 58a45ab67..9239ad862 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -115,7 +115,21 @@ void BufferCache<P>::WriteMemory(VAddr cpu_addr, u64 size) {
template <class P>
void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) {
- memory_tracker.CachedCpuWrite(cpu_addr, size);
+ const bool is_dirty = IsRegionRegistered(cpu_addr, size);
+ if (!is_dirty) {
+ return;
+ }
+ VAddr aligned_start = Common::AlignDown(cpu_addr, YUZU_PAGESIZE);
+ VAddr aligned_end = Common::AlignUp(cpu_addr + size, YUZU_PAGESIZE);
+ if (!IsRegionGpuModified(aligned_start, aligned_end - aligned_start)) {
+ WriteMemory(cpu_addr, size);
+ return;
+ }
+
+ tmp_buffer.resize_destructive(size);
+ cpu_memory.ReadBlockUnsafe(cpu_addr, tmp_buffer.data(), size);
+
+ InlineMemoryImplementation(cpu_addr, size, tmp_buffer);
}
template <class P>
@@ -1553,6 +1567,14 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size,
return false;
}
+ InlineMemoryImplementation(dest_address, copy_size, inlined_buffer);
+
+ return true;
+}
+
+template <class P>
+void BufferCache<P>::InlineMemoryImplementation(VAddr dest_address, size_t copy_size,
+ std::span<const u8> inlined_buffer) {
const IntervalType subtract_interval{dest_address, dest_address + copy_size};
ClearDownload(subtract_interval);
common_ranges.subtract(subtract_interval);
@@ -1574,8 +1596,6 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size,
} else {
buffer.ImmediateUpload(buffer.Offset(dest_address), inlined_buffer.first(copy_size));
}
-
- return true;
}
template <class P>
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h
index fe6068cfe..4d9bab7f7 100644
--- a/src/video_core/buffer_cache/buffer_cache_base.h
+++ b/src/video_core/buffer_cache/buffer_cache_base.h
@@ -543,6 +543,8 @@ private:
void ClearDownload(IntervalType subtract_interval);
+ void InlineMemoryImplementation(VAddr dest_address, size_t copy_size, std::span<const u8> inlined_buffer);
+
VideoCore::RasterizerInterface& rasterizer;
Core::Memory::Memory& cpu_memory;
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index db385076d..f823a1e2b 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -95,7 +95,9 @@ struct GPU::Impl {
/// Synchronizes CPU writes with Host GPU memory.
void InvalidateGPUCache() {
- rasterizer->InvalidateGPUCache();
+ std::function<void(VAddr, size_t)> callback_writes(
+ [this](VAddr address, size_t size) { rasterizer->OnCPUWrite(address, size); });
+ system.GatherGPUDirtyMemory(callback_writes);
}
/// Signal the ending of command list.
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index f7c0d939a..a63a29e61 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -570,7 +570,7 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
if (addr == 0 || size == 0) {
return;
}
- pipeline_cache.OnCPUWrite(addr, size);
+
{
std::scoped_lock lock{texture_cache.mutex};
texture_cache.WriteMemory(addr, size);
@@ -579,14 +579,11 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.CachedWriteMemory(addr, size);
}
+ pipeline_cache.InvalidateRegion(addr, size);
}
void RasterizerVulkan::InvalidateGPUCache() {
- pipeline_cache.SyncGuestHost();
- {
- std::scoped_lock lock{buffer_cache.mutex};
- buffer_cache.FlushCachedWrites();
- }
+ gpu.InvalidateGPUCache();
}
void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) {